In [1]:
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
In [2]:
#incarceration rates from 2001-2016
incarceration= pd.read_csv("crime_and_incarceration_by_state.csv")
incarceration = incarceration.rename(columns={"jurisdiction":"State"})

#Capitalize all states to follow the format of other datasets
for (i, state) in incarceration["State"].iteritems():
    lst = state.split() #capitalize()
    fixed= []
    for word in lst:
        word= word.capitalize()
        fixed.append(word)
    new= " ".join(fixed)
    incarceration.at[i,"State"] = new

incarceration= incarceration.rename(columns={"year": "Year"})  
incarceration= incarceration.drop("state_population", axis=1)
incarceration.groupby('State', as_index=False)
incarceration.tail(50)
Out[2]:
State Year prisoner_count incarceration rate (%)
750 Alabama 2016 23745 0.488525464
751 Alaska 2016 4378 0.590407297
752 Arizona 2016 42248 0.611523943
753 Arkansas 2016 15833 0.52984525
754 California 2016 129416 0.32933233
755 Colorado 2016 19486 0.352362207
756 Connecticut 2016 15040 0.419211832
757 Delaware 2016 6334 0.664848672
758 Florida 2016 98010 0.474473302
759 Georgia 2016 53433 0.518081915
760 Hawaii 2016 5182 0.362711672
761 Idaho 2016 7221 0.429814777
762 Illinois 2016 43616 0.339801582
763 Indiana 2016 25143 0.37900171
764 Iowa 2016 9049 0.289025188
765 Kansas 2016 9653 0.331977064
766 Kentucky 2016 11867 0.267508966
767 Louisiana 2016 15059 0.321350736
768 Maine 2016 2356 0.177111962
769 Maryland 2016 20236 0.335881045
770 Massachusetts 2016 9038 0.132449729
771 Michigan 2016 41122 0.413975212
772 Minnesota 2016 9509 0.17210704
773 Mississippi 2016 13720 0.459567598
774 Missouri 2016 32427 0.532360254
775 Montana 2016 3199 0.307994177
776 Nebraska 2016 5167 0.270863487
777 Nevada 2016 13932 0.473997824
778 New Hampshire 2016 2599 0.194679461
779 New Jersey 2016 19458 0.216719742
780 New Mexico 2016 6996 0.335470061
781 New York 2016 50611 0.255143528
782 North Carolina 2016 35970 0.354150846
783 North Dakota 2016 1639 0.21692864
784 Ohio 2016 52172 0.448885847
785 Oklahoma 2016 26367 0.672420507
786 Oregon 2016 14579 0.356804681
787 Pennsylvania 2016 48287 0.377623203
788 Rhode Island 2016 2887 0.272985327
789 South Carolina 2016 20376 0.410821195
790 South Dakota 2016 3770 0.437587488
791 Tennessee 2016 21539 0.323923768
792 Texas 2016 151276 0.542113414
793 Utah 2016 4502 0.147881909
794 Vermont 2016 1735 0.278333018
795 Virginia 2016 29882 0.355130146
796 Washington 2016 17228 0.236617994
797 West Virginia 2016 5899 0.322589995
798 Wisconsin 2016 23163 0.401235632
799 Wyoming 2016 2352 0.402113146
In [3]:
#drug overdose rate from 1999-2017
drug= pd.read_csv("Drug_Poisoning_Mortality_by_State.csv")
#drop unneeded columns
drug = drug.drop(["Sex","Age Group","Race and Hispanic Origin", "Crude Death Rate",
"Standard Error for Crude Rate","Low Confidence Limit for Crude Rate","Upper Confidence Limit for Crude Rate",
"Standard Error Age-adjusted Rate","Lower Confidence Limit for Age-adjusted Rate","Upper Confidence for Age-adjusted Rate",
"State Crude Rate in Range","US Crude Rate","US Age-adjusted Rate",
"Unit"], axis = 1)
drug= drug.rename(columns={"Age-adjusted Rate":"Overdose Rate"})

states = drug["State"]
for i in range(len(drug)):
    if states[i] == "District of Columbia":
         drug = drug.drop([i])
    if states[i] == "United States":
        drug = drug.drop([i])
drug

#to match incareration data set
drug2 = drug[drug['Year']>2000]
drug2 = drug2[drug2['Year']<2017]
drug2
Out[3]:
State Year Deaths Population Overdose Rate
2 Alabama 2001 216 4467634 4.8915
3 Alabama 2002 211 4480089 4.7619
4 Alabama 2003 197 4503491 4.4333
5 Alabama 2004 283 4530729 6.3542
6 Alabama 2005 283 4569805 6.3330
... ... ... ... ... ...
3015 Wyoming 2012 98 576412 16.7507
3016 Wyoming 2013 98 582658 17.2422
3017 Wyoming 2014 109 584153 19.3823
3018 Wyoming 2015 96 586107 16.4454
3019 Wyoming 2016 99 585501 17.5871

800 rows × 5 columns

In [4]:
#unemployment rates from 1980-2018
unemployment= pd.read_csv("unemployment_rate.csv")
#drop unneeded years data
unemployment= unemployment.drop(["Fips", "1980", "1981", "1982", "1983", "1984", "1985", "1986",
        "1987", "1988", "1989", "1990", "1991", "1992", "1993", "1994", "1995", "1996", "1997", "1998"],axis=1)
unemployment = unemployment.rename(columns={"Area":"State"})
unemployment = unemployment.drop([8,51,52,53])
unemployment.head(10)

#of len 20 (num of years), new col for state names
new = []
for i in range(50):
    for x in range(20):
        new.append(unemployment["State"].iloc[i])
#new

years = unemployment.iloc[:,1:]
years = years.transpose()
years

#single unemployment rate col
un = []
for i in range(20):
    un.append(years.iloc[i].tolist())  
#un

unemploy = []
for sublist in un:
    for item in sublist:
        unemploy.append(item)
#unemploy

#col of years
yr = [2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003,2002,2001,2000,1999]
yr2 = []
for i in range(50):
    yr2.append(yr)  
#yr2

year = []
for sublist in yr2:
    for item in sublist:
        year.append(item)
#year

#combining
finalunemployment = pd.DataFrame({"State": new, "Year": year,"Unemployment Rate" :unemploy})
finalunemployment = finalunemployment[finalunemployment["Year"]<2018]

#to match incarceraton data
un2 = finalunemployment[finalunemployment['Year']>2000]
un2 = un2[un2['Year']<2017]
un2
Out[4]:
State Year Unemployment Rate
2 Alabama 2016 4.4
3 Alabama 2015 4.6
4 Alabama 2014 5.2
5 Alabama 2013 3.1
6 Alabama 2012 2.9
... ... ... ...
993 Wyoming 2005 3.1
994 Wyoming 2004 2.7
995 Wyoming 2003 3.0
996 Wyoming 2002 4.5
997 Wyoming 2001 5.3

800 rows × 3 columns

In [5]:
#median income from 1984-2018
income= pd.read_csv("income_fixed.csv")
#drop uneeded years
income= income.drop(["1984 (19)", "1985 (20)", "1986",
        "1987 (21)", "1988", "1989", "1990", "1991", "1992 (22)", "1993 (23)", "1994 (24)", "1995 (25)",
        "1996", "1997", "1998", "2013 (39)"],axis=1)
income = income.rename(columns={"2004(revised)":"2004"})
income = income.rename(columns={"2000 (30)":"2000"})
income = income.rename(columns={"1999 (29)":"1999"})
income = income.rename(columns={"2013 (38)":"2013"})
income = income.rename(columns={"2010 (37)":"2010"})
income = income.rename(columns={"2009 (36)": "2009"})
income = income.drop([8])
#change to wide data
#new state column (this matches the number of years so we need to figure out the specific date range)
#need to drop DC row

years = income.iloc[:,1:]
years = years.transpose()
years

#stack all 0-51 on each other for single col of income
income2 = []
for i in range(20):
    income2.append(years.iloc[i].tolist())  
income2

flat_list = []
for sublist in income2:
    for item in sublist:
        flat_list.append(item)
flat_list

#combining cols
finalincome = pd.DataFrame({"State": new, "Year": year,"Median Income" :flat_list})
finalincome = finalincome[finalincome["Year"]<2018]
finalincome

#to match incarceration data
in2 = finalincome[finalincome['Year']>2000]
in2 = in2[in2['Year']<2017]
in2
Out[5]:
State Year Median Income
2 Alabama 2016 62,283
3 Alabama 2015 49,781
4 Alabama 2014 70,489
5 Alabama 2013 73,034
6 Alabama 2012 72,812
... ... ... ...
993 Wyoming 2005 46,050
994 Wyoming 2004 41,584
995 Wyoming 2003 45,693
996 Wyoming 2002 45,473
997 Wyoming 2001 29,297

800 rows × 3 columns

In [6]:
#merging data
#dataframe 1: with all above data (drug2, un2,in2, incarceration)
new_df = pd.merge(drug2, un2,  how='left', left_on=['State','Year'], right_on = ['State','Year'])
new_df = pd.merge(new_df, in2,  how='left', left_on=['State','Year'], right_on = ['State','Year'])
new_df = pd.merge(new_df, incarceration,  how='inner', left_on=['State','Year'], right_on = ['State','Year'])
new_df.tail(50)
#incarceration rate data has some missing values
Out[6]:
State Year Deaths Population Overdose Rate Unemployment Rate Median Income prisoner_count incarceration rate (%)
750 Washington 2015 1094 7170351 14.7417 5.1 34,299 17222 0.2401835
751 Washington 2016 1102 7288000 14.5105 3.4 54,251 17228 0.236617994
752 West Virginia 2001 206 1801481 11.4696 3.8 46,628 3403 0.188953206
753 West Virginia 2002 229 1805414 12.9455 4.1 50,593 3575 0.198073671
754 West Virginia 2003 262 1812295 15.1210 3.3 48,177 3790 0.209225809
755 West Virginia 2004 333 1816438 18.8017 4.2 43,629 3990 0.220132101
756 West Virginia 2005 184 1820492 10.5007 3.7 29,682 4036 0.222481551
757 West Virginia 2006 362 1827912 20.3521 4.8 36,995 4367 0.240146937
758 West Virginia 2007 397 1834052 22.4483 6.6 51,396 4907 0.270800509
759 West Virginia 2008 459 1840310 25.7420 3.9 36,251 4896 0.269831157
760 West Virginia 2009 226 1847775 12.4329 4.2 39,629 5062 0.278165951
761 West Virginia 2010 512 1852994 28.8855 3.3 45,088 5072 0.273516368
762 West Virginia 2011 635 1855364 36.2896 5.2 29,411 5149 0.277587891
763 West Virginia 2012 558 1855413 32.0021 4.7 42,525 5335 0.287340845
764 West Virginia 2013 570 1854304 32.1989 3.7 47,163 5708 0.307942134
765 West Virginia 2014 627 1850326 35.5052 3.0 39,594 5867 0.317079261
766 West Virginia 2015 725 1844128 41.4936 3.3 47,550 5925 0.321290062
767 West Virginia 2016 884 1831102 52.0211 4.3 38,609 5899 0.322589995
768 Wisconsin 2001 269 5406835 4.9719 4.6 41,461 20766 0.384132512
769 Wisconsin 2002 340 5445162 6.1875 2.8 38,626 21666 0.398294609
770 Wisconsin 2003 383 5479203 6.9900 3.7 31,038 22153 0.404673483
771 Wisconsin 2004 437 5514026 7.8581 3.2 41,383 22152 0.402505082
772 Wisconsin 2005 518 5546166 9.3367 4.8 32,478 21921 0.396570401
773 Wisconsin 2006 593 5577655 10.6366 2.9 47,038 22564 0.406082527
774 Wisconsin 2007 622 5610775 11.0976 4.1 46,089 22839 0.407719882
775 Wisconsin 2008 606 5640996 10.6039 3.3 44,005 22282 0.395915612
776 Wisconsin 2009 631 5669264 10.9348 3.9 52,205 22262 0.393685053
777 Wisconsin 2010 630 5686986 10.9422 3.4 38,862 22033 0.387110331
778 Wisconsin 2011 698 5711767 12.0766 4.9 32,654 22388 0.392094844
779 Wisconsin 2012 692 5726398 12.2266 4.3 33,738 22419 0.391628763
780 Wisconsin 2013 856 5742713 15.0285 3.4 37,348 22443 0.390791985
781 Wisconsin 2014 853 5757564 15.0648 2.5 41,098 22572 0.392040801
782 Wisconsin 2015 878 5771337 15.4719 3.4 40,838 22914 0.397031052
783 Wisconsin 2016 1074 5778708 19.2873 4.3 46,330 23163 0.401235632
784 Wyoming 2001 22 494657 4.5032 5.3 29,297 1487 0.301162117
785 Wyoming 2002 34 500017 6.9214 4.5 45,473 1641 0.328969789
786 Wyoming 2003 30 503453 5.9028 3.0 45,693 1630 0.324629415
787 Wyoming 2004 46 509106 9.0935 2.7 41,584 1765 0.348892144
788 Wyoming 2005 26 514157 4.9027 3.1 46,050 2096 0.411951305
789 Wyoming 2006 55 522667 10.5619 3.9 38,688 2035 0.395142562
790 Wyoming 2007 63 534876 12.2666 3.5 36,522 1927 0.368571046
791 Wyoming 2008 74 546043 14.0453 3.0 35,828 1704 0.319899074
792 Wyoming 2009 60 559851 10.9508 3.4 36,462 1743 0.320245466
793 Wyoming 2010 85 563626 14.9556 4.1 42,719 2092 0.370557998
794 Wyoming 2011 85 568158 15.2082 4.3 37,758 2162 0.381065856
795 Wyoming 2012 98 576412 16.7507 4.2 40,619 2187 0.379275301
796 Wyoming 2013 98 582658 17.2422 3.4 32,683 2288 0.392302773
797 Wyoming 2014 109 584153 19.3823 4.6 39,489 2369 0.405544438
798 Wyoming 2015 96 586107 16.4454 2.6 32,663 2400 0.409481545
799 Wyoming 2016 99 585501 17.5871 3.9 37,254 2352 0.402113146
In [7]:
#dataframe 2: without incarceration data (drug, finalunemployment, finalincome) 1999-2017
new_df2 = pd.merge(drug, finalunemployment,  how='left', left_on=['State','Year'], right_on = ['State','Year'])
new_df2 = pd.merge(new_df2, finalincome,how='left', left_on=['State','Year'], right_on = ['State','Year'])
new_df2
Out[7]:
State Year Deaths Population Overdose Rate Unemployment Rate Median Income
0 Alabama 1999 169 4430143 3.8521 3.6 86,223
1 Alabama 2000 197 4447100 4.4857 4.0 58,663
2 Alabama 2001 216 4467634 4.8915 5.1 49,973
3 Alabama 2002 211 4480089 4.7619 4.5 54,555
4 Alabama 2003 197 4503491 4.4333 3.3 63,938
... ... ... ... ... ... ... ...
945 Wyoming 2013 98 582658 17.2422 3.4 32,683
946 Wyoming 2014 109 584153 19.3823 4.6 39,489
947 Wyoming 2015 96 586107 16.4454 2.6 32,663
948 Wyoming 2016 99 585501 17.5871 3.9 37,254
949 Wyoming 2017 69 579315 12.2040 4.1 39,989

950 rows × 7 columns

In [8]:
state_abbrev_map = {'Alabama': 'AL','Alaska': 'AK','American Samoa': 'AS','Arizona': 'AZ','Arkansas': 'AR','California': 'CA','Colorado': 'CO','Connecticut': 'CT','Delaware': 'DE','District of Columbia': 'DC','Florida': 'FL','Georgia': 'GA','Guam': 'GU','Hawaii': 'HI','Idaho': 'ID','Illinois': 'IL','Indiana': 'IN','Iowa': 'IA','Kansas': 'KS','Kentucky': 'KY','Louisiana': 'LA','Maine': 'ME','Maryland': 'MD','Massachusetts': 'MA','Michigan': 'MI','Minnesota': 'MN','Mississippi': 'MS','Missouri': 'MO','Montana': 'MT','Nebraska': 'NE','Nevada': 'NV','New Hampshire': 'NH','New Jersey': 'NJ','New Mexico': 'NM','New York': 'NY','North Carolina': 'NC','North Dakota': 'ND','Northern Mariana Islands':'MP','Ohio': 'OH','Oklahoma': 'OK','Oregon': 'OR','Pennsylvania': 'PA','Puerto Rico': 'PR','Rhode Island': 'RI','South Carolina': 'SC','South Dakota': 'SD','Tennessee': 'TN','Texas': 'TX',
                    'Utah': 'UT','Vermont': 'VT','Virgin Islands': 'VI','Virginia': 'VA',
                    'Washington': 'WA','West Virginia': 'WV','Wisconsin': 'WI','Wyoming': 'WY'}

# Convert state names to abbrevations
for i in range(0, len(new_df)):
    if(new_df['State'][i] in state_abbrev_map):
        new_df['State'][i] = state_abbrev_map[new_df['State'][i]]
for i in range(0, len(new_df2)):
    if(new_df2['State'][i] in state_abbrev_map):
        new_df2['State'][i] = state_abbrev_map[new_df2['State'][i]]

new_df2
Out[8]:
State Year Deaths Population Overdose Rate Unemployment Rate Median Income
0 AL 1999 169 4430143 3.8521 3.6 86,223
1 AL 2000 197 4447100 4.4857 4.0 58,663
2 AL 2001 216 4467634 4.8915 5.1 49,973
3 AL 2002 211 4480089 4.7619 4.5 54,555
4 AL 2003 197 4503491 4.4333 3.3 63,938
... ... ... ... ... ... ... ...
945 WY 2013 98 582658 17.2422 3.4 32,683
946 WY 2014 109 584153 19.3823 4.6 39,489
947 WY 2015 96 586107 16.4454 2.6 32,663
948 WY 2016 99 585501 17.5871 3.9 37,254
949 WY 2017 69 579315 12.2040 4.1 39,989

950 rows × 7 columns

In [9]:
new_df = new_df.replace(",","", regex=True)
new_df2 = new_df2.replace(",","", regex=True)
new_df2["Median Income"] = new_df["Median Income"].astype(float)
In [10]:
import seaborn as sns
import matplotlib as plt
ax = sns.regplot(x = "Population", y="Deaths", data=new_df)
In [11]:
ax = sns.regplot(x = "Population", y="Overdose Rate", data=new_df)
In [12]:
ax = sns.regplot(x = "Deaths", y="Overdose Rate", data=new_df)
In [13]:
ax = sns.regplot(x = "Unemployment Rate", y="Overdose Rate", data=new_df2)
In [14]:
ax = sns.regplot(x = "Median Income", y="Overdose Rate", data=new_df2)
In [15]:
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
In [16]:
!pip install -q git+https://github.com/tensorflow/docs
In [17]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)
2.1.0
In [18]:
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling
In [19]:
dataset = new_df.copy()
dataset = dataset.drop(columns=["State", "Year"])
dataset = dataset.replace(',','', regex=True)
dataset = dataset.drop([510])
dataset.to_csv("out.csv")
dataset.astype(float)
dataset["Median Income"] = dataset["Median Income"].astype(float)
dataset["incarceration rate (%)"] = dataset["incarceration rate (%)"].astype(float)
In [20]:
dataset.tail()
Out[20]:
Deaths Population Overdose Rate Unemployment Rate Median Income prisoner_count incarceration rate (%)
795 98 576412 16.7507 4.2 40619.0 2187 0.379275
796 98 582658 17.2422 3.4 32683.0 2288 0.392303
797 109 584153 19.3823 4.6 39489.0 2369 0.405544
798 96 586107 16.4454 2.6 32663.0 2400 0.409482
799 99 585501 17.5871 3.9 37254.0 2352 0.402113
In [21]:
dataset.isna().sum()
Out[21]:
Deaths                    0
Population                0
Overdose Rate             0
Unemployment Rate         0
Median Income             0
prisoner_count            0
incarceration rate (%)    0
dtype: int64
In [22]:
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)
In [23]:
sns.pairplot(train_dataset[["Deaths", "Population", "Overdose Rate", "Unemployment Rate", "Median Income", "prisoner_count", "incarceration rate (%)"]], diag_kind="kde")
Out[23]:
<seaborn.axisgrid.PairGrid at 0x127872e90>
In [24]:
train_stats = train_dataset.describe(include='all')
train_stats
train_stats.pop("Deaths")
train_stats = train_stats.transpose()
In [25]:
train_labels = train_dataset.pop('Deaths')
test_labels = test_dataset.pop('Deaths')
In [26]:
def norm(x):
    return (x - train_stats['mean']) / train_stats['std']

normed_train_data = norm(train_dataset.astype(float))
normed_test_data = norm(test_dataset.astype(float))
normed_train_data
Out[26]:
Population Overdose Rate Unemployment Rate Median Income prisoner_count incarceration rate (%)
399 -0.006411 1.785562 0.297130 -0.809055 0.215554 0.970475
534 -0.799801 -1.318354 1.814036 -0.404389 -0.737800 -1.238721
345 0.546412 0.191922 -1.017522 1.675718 0.574533 0.368678
40 0.030071 0.551018 -0.461323 1.264319 0.464898 1.548358
642 -0.783626 -1.556770 -0.663577 -0.737407 -0.687821 0.004220
... ... ... ... ... ... ...
464 0.343555 -0.763387 1.611782 -0.790465 0.019723 -0.617597
25 -0.791429 -0.194922 -1.422030 3.154201 -0.615632 2.510684
110 -0.371317 1.526153 -0.006251 -0.890149 -0.288324 0.366038
149 0.440267 -0.631134 0.448821 0.654757 0.841140 1.190996
152 0.508087 -0.404735 0.145439 -0.361480 0.864056 1.056961

639 rows × 6 columns

In [27]:
def build_model():
    model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
    ])

    optimizer = tf.keras.optimizers.RMSprop(0.001)

    model.compile(loss='mse',
                optimizer=optimizer,
                metrics=['mae', 'mse'])
    return model

model = build_model()
In [28]:
model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense (Dense)                (None, 64)                448       
_________________________________________________________________
dense_1 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 65        
=================================================================
Total params: 4,673
Trainable params: 4,673
Non-trainable params: 0
_________________________________________________________________
In [29]:
example_batch = normed_train_data[:10]
example_result = model.predict(example_batch)
example_result
Out[29]:
array([[0.12671736],
       [0.45301247],
       [0.33766353],
       [0.20201725],
       [0.5741988 ],
       [0.22445567],
       [0.3474688 ],
       [0.10230982],
       [0.13101406],
       [0.5919368 ]], dtype=float32)
In [30]:
EPOCHS = 10000

history = model.fit(
  normed_train_data, train_labels,
  epochs=EPOCHS, validation_split = 0.2, verbose=0,
  callbacks=[tfdocs.modeling.EpochDots()])
Epoch: 0, loss:1125216.2164,  mae:713.5579,  mse:1125216.3750,  val_loss:1353222.6250,  val_mae:845.8043,  val_mse:1353222.6250,  
....................................................................................................
Epoch: 100, loss:69799.9700,  mae:143.8922,  mse:69799.9688,  val_loss:76668.5361,  val_mae:163.4652,  val_mse:76668.5312,  
....................................................................................................
Epoch: 200, loss:28449.6535,  mae:80.2828,  mse:28449.6504,  val_loss:26834.7314,  val_mae:89.5464,  val_mse:26834.7324,  
....................................................................................................
Epoch: 300, loss:10873.8693,  mae:55.8535,  mse:10873.8691,  val_loss:7169.1420,  val_mae:59.1156,  val_mse:7169.1421,  
....................................................................................................
Epoch: 400, loss:4466.6197,  mae:35.2465,  mse:4466.6191,  val_loss:3011.9329,  val_mae:37.3690,  val_mse:3011.9331,  
....................................................................................................
Epoch: 500, loss:1601.5178,  mae:22.7126,  mse:1601.5178,  val_loss:1977.7000,  val_mae:28.2467,  val_mse:1977.7000,  
....................................................................................................
Epoch: 600, loss:642.4297,  mae:16.0229,  mse:642.4296,  val_loss:1813.6638,  val_mae:26.5211,  val_mse:1813.6638,  
....................................................................................................
Epoch: 700, loss:363.4655,  mae:12.1659,  mse:363.4655,  val_loss:1822.2555,  val_mae:24.5017,  val_mse:1822.2554,  
....................................................................................................
Epoch: 800, loss:278.1456,  mae:10.5957,  mse:278.1456,  val_loss:1511.9117,  val_mae:21.8126,  val_mse:1511.9117,  
....................................................................................................
Epoch: 900, loss:197.8798,  mae:8.8385,  mse:197.8798,  val_loss:1153.1827,  val_mae:19.5810,  val_mse:1153.1826,  
....................................................................................................
Epoch: 1000, loss:187.8126,  mae:8.4966,  mse:187.8126,  val_loss:904.8686,  val_mae:17.7668,  val_mse:904.8687,  
....................................................................................................
Epoch: 1100, loss:157.8038,  mae:8.1303,  mse:157.8038,  val_loss:1012.0383,  val_mae:18.5169,  val_mse:1012.0383,  
....................................................................................................
Epoch: 1200, loss:151.2538,  mae:7.8548,  mse:151.2538,  val_loss:1110.3467,  val_mae:18.4489,  val_mse:1110.3468,  
....................................................................................................
Epoch: 1300, loss:162.5755,  mae:8.0024,  mse:162.5755,  val_loss:897.4351,  val_mae:17.6132,  val_mse:897.4351,  
....................................................................................................
Epoch: 1400, loss:127.4703,  mae:7.5686,  mse:127.4703,  val_loss:1254.9246,  val_mae:19.2526,  val_mse:1254.9247,  
....................................................................................................
Epoch: 1500, loss:98.5691,  mae:6.6203,  mse:98.5691,  val_loss:1041.2206,  val_mae:17.7452,  val_mse:1041.2206,  
....................................................................................................
Epoch: 1600, loss:129.8989,  mae:7.0778,  mse:129.8989,  val_loss:956.9867,  val_mae:18.8951,  val_mse:956.9867,  
....................................................................................................
Epoch: 1700, loss:99.1496,  mae:6.5211,  mse:99.1496,  val_loss:921.6853,  val_mae:18.4385,  val_mse:921.6853,  
....................................................................................................
Epoch: 1800, loss:111.2056,  mae:6.6445,  mse:111.2056,  val_loss:908.1248,  val_mae:18.4552,  val_mse:908.1248,  
....................................................................................................
Epoch: 1900, loss:101.0218,  mae:6.4973,  mse:101.0218,  val_loss:736.4521,  val_mae:16.6848,  val_mse:736.4521,  
....................................................................................................
Epoch: 2000, loss:106.9724,  mae:6.7651,  mse:106.9724,  val_loss:1015.0822,  val_mae:18.0143,  val_mse:1015.0822,  
....................................................................................................
Epoch: 2100, loss:131.4002,  mae:7.1515,  mse:131.4002,  val_loss:968.7407,  val_mae:17.4702,  val_mse:968.7407,  
....................................................................................................
Epoch: 2200, loss:100.4850,  mae:6.3745,  mse:100.4850,  val_loss:1112.6656,  val_mae:18.5922,  val_mse:1112.6655,  
....................................................................................................
Epoch: 2300, loss:92.2523,  mae:6.5281,  mse:92.2523,  val_loss:733.4339,  val_mae:16.3694,  val_mse:733.4338,  
....................................................................................................
Epoch: 2400, loss:84.0253,  mae:5.9440,  mse:84.0253,  val_loss:783.6604,  val_mae:17.2303,  val_mse:783.6604,  
....................................................................................................
Epoch: 2500, loss:93.2952,  mae:6.3785,  mse:93.2952,  val_loss:795.7381,  val_mae:17.0333,  val_mse:795.7380,  
....................................................................................................
Epoch: 2600, loss:88.0575,  mae:5.9388,  mse:88.0575,  val_loss:673.9261,  val_mae:15.9323,  val_mse:673.9260,  
....................................................................................................
Epoch: 2700, loss:68.7171,  mae:5.4979,  mse:68.7171,  val_loss:675.3924,  val_mae:15.8142,  val_mse:675.3924,  
....................................................................................................
Epoch: 2800, loss:86.8280,  mae:5.7362,  mse:86.8280,  val_loss:761.7206,  val_mae:17.2335,  val_mse:761.7206,  
....................................................................................................
Epoch: 2900, loss:79.0164,  mae:5.8076,  mse:79.0164,  val_loss:704.3429,  val_mae:15.9221,  val_mse:704.3429,  
....................................................................................................
Epoch: 3000, loss:75.7319,  mae:5.4840,  mse:75.7319,  val_loss:661.3564,  val_mae:16.1756,  val_mse:661.3564,  
....................................................................................................
Epoch: 3100, loss:58.9295,  mae:5.0615,  mse:58.9295,  val_loss:720.0006,  val_mae:16.1052,  val_mse:720.0006,  
....................................................................................................
Epoch: 3200, loss:75.7006,  mae:5.7378,  mse:75.7006,  val_loss:680.7914,  val_mae:16.0454,  val_mse:680.7913,  
....................................................................................................
Epoch: 3300, loss:81.6809,  mae:5.6202,  mse:81.6809,  val_loss:722.1917,  val_mae:16.2925,  val_mse:722.1917,  
....................................................................................................
Epoch: 3400, loss:64.6248,  mae:4.9360,  mse:64.6248,  val_loss:872.8085,  val_mae:17.0957,  val_mse:872.8085,  
....................................................................................................
Epoch: 3500, loss:73.5436,  mae:5.3430,  mse:73.5436,  val_loss:659.8710,  val_mae:15.6498,  val_mse:659.8710,  
....................................................................................................
Epoch: 3600, loss:90.4764,  mae:5.6997,  mse:90.4764,  val_loss:614.8783,  val_mae:15.5592,  val_mse:614.8784,  
....................................................................................................
Epoch: 3700, loss:59.1709,  mae:4.8297,  mse:59.1709,  val_loss:775.1910,  val_mae:16.6644,  val_mse:775.1910,  
....................................................................................................
Epoch: 3800, loss:67.5920,  mae:5.3016,  mse:67.5920,  val_loss:876.3785,  val_mae:18.2940,  val_mse:876.3785,  
....................................................................................................
Epoch: 3900, loss:73.6563,  mae:5.4714,  mse:73.6563,  val_loss:651.0358,  val_mae:15.6667,  val_mse:651.0358,  
....................................................................................................
Epoch: 4000, loss:70.0718,  mae:5.5610,  mse:70.0718,  val_loss:978.9228,  val_mae:17.4905,  val_mse:978.9229,  
....................................................................................................
Epoch: 4100, loss:68.8130,  mae:4.8113,  mse:68.8130,  val_loss:725.5564,  val_mae:16.2964,  val_mse:725.5564,  
....................................................................................................
Epoch: 4200, loss:48.1520,  mae:4.4997,  mse:48.1520,  val_loss:886.6851,  val_mae:18.1218,  val_mse:886.6851,  
....................................................................................................
Epoch: 4300, loss:74.5812,  mae:5.3449,  mse:74.5812,  val_loss:644.5324,  val_mae:15.7529,  val_mse:644.5324,  
....................................................................................................
Epoch: 4400, loss:48.6494,  mae:4.8619,  mse:48.6494,  val_loss:612.6890,  val_mae:15.4773,  val_mse:612.6890,  
....................................................................................................
Epoch: 4500, loss:54.0296,  mae:4.9587,  mse:54.0295,  val_loss:985.5374,  val_mae:18.0507,  val_mse:985.5374,  
....................................................................................................
Epoch: 4600, loss:87.5510,  mae:5.3035,  mse:87.5510,  val_loss:645.4147,  val_mae:16.0184,  val_mse:645.4147,  
....................................................................................................
Epoch: 4700, loss:80.2062,  mae:5.4168,  mse:80.2062,  val_loss:655.0455,  val_mae:15.9423,  val_mse:655.0455,  
....................................................................................................
Epoch: 4800, loss:47.9302,  mae:4.5804,  mse:47.9302,  val_loss:1084.5606,  val_mae:20.3883,  val_mse:1084.5605,  
....................................................................................................
Epoch: 4900, loss:100.6378,  mae:5.8195,  mse:100.6378,  val_loss:617.7265,  val_mae:15.5515,  val_mse:617.7265,  
....................................................................................................
Epoch: 5000, loss:60.2516,  mae:4.7704,  mse:60.2516,  val_loss:611.5426,  val_mae:15.6381,  val_mse:611.5427,  
....................................................................................................
Epoch: 5100, loss:65.6698,  mae:5.2350,  mse:65.6698,  val_loss:781.6073,  val_mae:16.6293,  val_mse:781.6073,  
....................................................................................................
Epoch: 5200, loss:64.2719,  mae:4.9825,  mse:64.2719,  val_loss:1064.1279,  val_mae:20.0691,  val_mse:1064.1278,  
....................................................................................................
Epoch: 5300, loss:41.2251,  mae:4.0835,  mse:41.2251,  val_loss:640.1063,  val_mae:15.8742,  val_mse:640.1063,  
....................................................................................................
Epoch: 5400, loss:53.3567,  mae:4.7192,  mse:53.3567,  val_loss:1044.6352,  val_mae:20.0000,  val_mse:1044.6351,  
....................................................................................................
Epoch: 5500, loss:38.6226,  mae:4.2256,  mse:38.6226,  val_loss:646.8918,  val_mae:16.0319,  val_mse:646.8918,  
....................................................................................................
Epoch: 5600, loss:52.6891,  mae:4.5581,  mse:52.6891,  val_loss:870.4116,  val_mae:17.6639,  val_mse:870.4116,  
....................................................................................................
Epoch: 5700, loss:73.2273,  mae:5.1825,  mse:73.2273,  val_loss:627.7981,  val_mae:15.7612,  val_mse:627.7981,  
....................................................................................................
Epoch: 5800, loss:70.2397,  mae:5.2351,  mse:70.2397,  val_loss:655.4430,  val_mae:16.0011,  val_mse:655.4430,  
....................................................................................................
Epoch: 5900, loss:42.4059,  mae:4.5613,  mse:42.4059,  val_loss:669.0385,  val_mae:16.0552,  val_mse:669.0385,  
....................................................................................................
Epoch: 6000, loss:39.3535,  mae:4.1608,  mse:39.3535,  val_loss:758.5374,  val_mae:16.6774,  val_mse:758.5374,  
....................................................................................................
Epoch: 6100, loss:43.7200,  mae:4.4789,  mse:43.7200,  val_loss:659.6536,  val_mae:16.1187,  val_mse:659.6536,  
....................................................................................................
Epoch: 6200, loss:75.9719,  mae:5.5367,  mse:75.9719,  val_loss:655.7038,  val_mae:16.2829,  val_mse:655.7037,  
....................................................................................................
Epoch: 6300, loss:76.9765,  mae:5.3066,  mse:76.9766,  val_loss:864.6140,  val_mae:17.4894,  val_mse:864.6141,  
....................................................................................................
Epoch: 6400, loss:69.2852,  mae:5.2567,  mse:69.2852,  val_loss:923.5270,  val_mae:17.8030,  val_mse:923.5270,  
....................................................................................................
Epoch: 6500, loss:57.8925,  mae:4.6918,  mse:57.8925,  val_loss:607.0073,  val_mae:15.7330,  val_mse:607.0073,  
....................................................................................................
Epoch: 6600, loss:44.3429,  mae:4.2219,  mse:44.3429,  val_loss:753.1806,  val_mae:16.8016,  val_mse:753.1807,  
....................................................................................................
Epoch: 6700, loss:66.7416,  mae:5.1763,  mse:66.7416,  val_loss:635.8203,  val_mae:16.1197,  val_mse:635.8203,  
....................................................................................................
Epoch: 6800, loss:73.1945,  mae:5.0715,  mse:73.1945,  val_loss:702.6101,  val_mae:18.4850,  val_mse:702.6101,  
....................................................................................................
Epoch: 6900, loss:62.0756,  mae:5.0016,  mse:62.0756,  val_loss:597.0972,  val_mae:15.8412,  val_mse:597.0972,  
....................................................................................................
Epoch: 7000, loss:54.9166,  mae:4.8361,  mse:54.9166,  val_loss:586.6789,  val_mae:15.9604,  val_mse:586.6789,  
....................................................................................................
Epoch: 7100, loss:41.6076,  mae:4.0432,  mse:41.6076,  val_loss:611.6194,  val_mae:16.1953,  val_mse:611.6194,  
....................................................................................................
Epoch: 7200, loss:45.6879,  mae:4.1871,  mse:45.6879,  val_loss:837.1381,  val_mae:17.6136,  val_mse:837.1381,  
....................................................................................................
Epoch: 7300, loss:39.3804,  mae:3.9932,  mse:39.3804,  val_loss:573.0548,  val_mae:15.8314,  val_mse:573.0547,  
....................................................................................................
Epoch: 7400, loss:46.8800,  mae:4.3392,  mse:46.8800,  val_loss:606.9212,  val_mae:16.0794,  val_mse:606.9212,  
....................................................................................................
Epoch: 7500, loss:47.8012,  mae:4.2812,  mse:47.8012,  val_loss:1144.8518,  val_mae:21.0846,  val_mse:1144.8518,  
....................................................................................................
Epoch: 7600, loss:64.2474,  mae:5.0869,  mse:64.2474,  val_loss:741.7596,  val_mae:17.4204,  val_mse:741.7596,  
....................................................................................................
Epoch: 7700, loss:62.3529,  mae:4.6161,  mse:62.3529,  val_loss:658.8443,  val_mae:16.3873,  val_mse:658.8442,  
....................................................................................................
Epoch: 7800, loss:70.7560,  mae:4.6694,  mse:70.7560,  val_loss:737.5990,  val_mae:17.4355,  val_mse:737.5989,  
....................................................................................................
Epoch: 7900, loss:80.3263,  mae:5.1582,  mse:80.3263,  val_loss:669.2403,  val_mae:16.5276,  val_mse:669.2403,  
....................................................................................................
Epoch: 8000, loss:30.4147,  mae:3.5700,  mse:30.4147,  val_loss:642.6853,  val_mae:17.1036,  val_mse:642.6853,  
....................................................................................................
Epoch: 8100, loss:31.6655,  mae:3.6563,  mse:31.6655,  val_loss:614.3285,  val_mae:16.0618,  val_mse:614.3286,  
....................................................................................................
Epoch: 8200, loss:62.1901,  mae:4.6838,  mse:62.1901,  val_loss:562.2937,  val_mae:15.4989,  val_mse:562.2936,  
....................................................................................................
Epoch: 8300, loss:56.1971,  mae:4.6469,  mse:56.1971,  val_loss:995.3214,  val_mae:19.6873,  val_mse:995.3214,  
....................................................................................................
Epoch: 8400, loss:34.5923,  mae:3.6345,  mse:34.5923,  val_loss:624.9959,  val_mae:16.2900,  val_mse:624.9958,  
....................................................................................................
Epoch: 8500, loss:42.8617,  mae:4.1579,  mse:42.8617,  val_loss:566.4176,  val_mae:15.6664,  val_mse:566.4177,  
....................................................................................................
Epoch: 8600, loss:61.3447,  mae:4.4877,  mse:61.3447,  val_loss:619.9199,  val_mae:15.9765,  val_mse:619.9199,  
....................................................................................................
Epoch: 8700, loss:92.2475,  mae:4.7562,  mse:92.2475,  val_loss:573.6821,  val_mae:15.7057,  val_mse:573.6821,  
....................................................................................................
Epoch: 8800, loss:44.5341,  mae:4.3808,  mse:44.5341,  val_loss:795.5697,  val_mae:17.8090,  val_mse:795.5696,  
....................................................................................................
Epoch: 8900, loss:41.4801,  mae:3.8019,  mse:41.4801,  val_loss:588.2789,  val_mae:15.7732,  val_mse:588.2789,  
....................................................................................................
Epoch: 9000, loss:52.9670,  mae:4.3264,  mse:52.9670,  val_loss:640.6195,  val_mae:16.6584,  val_mse:640.6196,  
....................................................................................................
Epoch: 9100, loss:24.1696,  mae:3.5179,  mse:24.1696,  val_loss:631.2027,  val_mae:16.6088,  val_mse:631.2027,  
....................................................................................................
Epoch: 9200, loss:50.0946,  mae:4.1538,  mse:50.0946,  val_loss:603.7923,  val_mae:16.1416,  val_mse:603.7923,  
....................................................................................................
Epoch: 9300, loss:61.6532,  mae:4.9280,  mse:61.6532,  val_loss:588.8617,  val_mae:15.8740,  val_mse:588.8617,  
....................................................................................................
Epoch: 9400, loss:38.7478,  mae:3.9328,  mse:38.7478,  val_loss:676.8274,  val_mae:16.8807,  val_mse:676.8274,  
....................................................................................................
Epoch: 9500, loss:42.8250,  mae:4.2825,  mse:42.8250,  val_loss:710.6410,  val_mae:17.5646,  val_mse:710.6410,  
....................................................................................................
Epoch: 9600, loss:84.8623,  mae:5.2530,  mse:84.8623,  val_loss:609.4381,  val_mae:15.8815,  val_mse:609.4381,  
....................................................................................................
Epoch: 9700, loss:45.5502,  mae:3.8407,  mse:45.5502,  val_loss:553.6853,  val_mae:15.3028,  val_mse:553.6853,  
....................................................................................................
Epoch: 9800, loss:55.5200,  mae:4.4397,  mse:55.5200,  val_loss:616.4996,  val_mae:16.0294,  val_mse:616.4996,  
....................................................................................................
Epoch: 9900, loss:40.0513,  mae:4.1756,  mse:40.0513,  val_loss:559.1184,  val_mae:15.9484,  val_mse:559.1184,  
....................................................................................................
In [31]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
Out[31]:
loss mae mse val_loss val_mae val_mse epoch
9995 37.825863 3.673659 37.825867 563.617477 15.960093 563.617493 9995
9996 68.847660 4.660394 68.847656 607.868881 16.157730 607.868896 9996
9997 41.807644 3.649871 41.807648 590.925629 16.977535 590.925659 9997
9998 69.999717 4.457794 69.999725 668.808083 16.907946 668.808105 9998
9999 39.860604 3.761033 39.860603 587.420181 15.710565 587.420166 9999
In [32]:
plotter = tfdocs.plots.HistoryPlotter(smoothing_std=2)
In [33]:
plotter.plot({'Basic': history}, metric = "mae")
plt.ylabel('MAE [Deaths]')
Out[33]:
Text(0, 0.5, 'MAE [Deaths]')
In [34]:
plotter.plot({'Basic': history}, metric = "mse")
plt.ylabel('MSE [Deaths^2]')
Out[34]:
Text(0, 0.5, 'MSE [Deaths^2]')
In [35]:
test_predictions = model.predict(normed_test_data).flatten()

a = plt.axes(aspect='equal')
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [Deaths]')
plt.ylabel('Predictions [Deaths]')
Out[35]:
Text(0, 0.5, 'Predictions [Deaths]')
In [36]:
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [Deaths]")
_ = plt.ylabel("Count")
In [37]:
import plotly
import plotly.figure_factory as ff
import geopandas
import shapely

def ncMap(dataset, values, title):
    fips = list(set(dataset['FIPS'].tolist()))
    fig = ff.create_choropleth(
        fips=fips,
        values=values,
        scope=['North Carolina'],
        show_state_data=True,
        binning_endpoints = list(np.mgrid[min(values):max(values):10j]),
        plot_bgcolor='rgb(229,229,229)',
        paper_bgcolor='rgb(229,229,229)',
        legend_title=title,
        county_outline={'color': 'rgb(255,255,255)', 'width': 0.5},)
    fig.layout.template = None
    fig.show()
In [38]:
import plotly.graph_objects as go

year = 2000
df_by_year = new_df2[(new_df2['Year']== year )]

fig = go.Figure(data=go.Choropleth(
    locations=new_df2['State'], # Spatial coordinates
    z = new_df2['Overdose Rate'].astype(float), # Data to be color-coded
    locationmode = 'USA-states', # set of locations match entries in `locations`
    colorscale = 'Reds',
    colorbar_title = "Overdose Rate",
))

fig.update_layout(
    title_text = str(year) + ' State Overdose Rates',
    geo_scope='usa', # limite map scope to USA
)
plotly.offline.iplot(fig)
#df_by_year
In [39]:
def createMapOverMultipleYears(dataframe, col_name, unit_specifier):
    data_slider = []
    max_val = max(dataframe[col_name])
    min_val = min(dataframe[col_name])
    
    for year in dataframe.Year.unique():
        df_by_year = dataframe[(dataframe['Year']== year )]

        for col in df_by_year.columns:
            df_by_year[col] = df_by_year[col].astype(str)
    
        df_by_year['text'] = df_by_year['State'] + 'Pop: ' + df_by_year['Population'] + col_name + ': ' + " " + df_by_year[col_name] + unit_specifier
    
        data_by_year = dict(
            type='choropleth',
            locations=df_by_year['State'], # Spatial coordinates
            z = df_by_year[col_name].astype(float), # Data to be color-coded
            locationmode = 'USA-states', # set of locations match entries in `locations`
            colorscale = 'Reds',
            colorbar_title = col_name + '\n' + unit_specifier,
            zmin = min_val,
            zmax = max_val)
    
        data_slider.append(data_by_year)  # I add the dictionary to the list of dictionaries for the slider

    steps = []
    for i in range(len(data_slider)):
        step = dict(method='restyle',
                    args=['visible', [False] * len(data_slider)],
                    label='Year {}'.format(i + min(dataframe['Year']))) # label to be displayed for each step (year)
        step['args'][1][i] = True
        steps.append(step)
    sliders = [dict(active=0, pad={"t": 1}, steps=steps)]
    
    layout = dict(geo=dict(scope='usa',
                           projection={'type': 'albers usa'}),
                  sliders=sliders)
    
    
    final_fig = dict(data=data_slider, layout=layout) 
    plotly.offline.iplot(final_fig)
In [40]:
createMapOverMultipleYears(new_df2, 'Overdose Rate', 'per 100,000 people')
In [41]:
createMapOverMultipleYears(new_df2, 'Population', '')
In [42]:
createMapOverMultipleYears(new_df, 'prisoner_count', '')
In [43]:
createMapOverMultipleYears(new_df2, 'Unemployment Rate', '%')